使用pyspark函式庫實作 word count程式。
pip install pyspark
import sys
from pyspark import SparkContext, SparkConf
if __name__ == "__main__":
# 建立 Spark context
sc = SparkContext("local","PySpark Word Count")
# 自 imput.txt 讀出
words = sc.textFile("/Users/allen/Dropbox/Code_Section/CodeWS/30/input.txt").flatMap(lambda line: line.split(" "))
# 計算每個字的次數
wordCounts = words.map(lambda word: (word, 1)).reduceByKey(lambda a,b:a +b)
# 將計數輸出
wordCounts.saveAsTextFile("/Users/allen/Dropbox/Code_Section/CodeWS/30/output/")